import jieba

excludes = {"什么", "一个", "我们", "那里", "如今", "你们", "说道", "起来",
            "姑娘", "这里", "出来", "他们", "众人", "奶奶", "自己", "一面",
            "太太", "只见", "怎么", "两个", "没有", "不是", "不知", "这个",
            "知道", "听见", "这样", "进来", "告诉", "东西", "咱们", "就是",
            "回来", "大家", "只是", "老爷", "只得", "丫头", "这些", "不敢",
            "出去", "所以", "不过"}
txt = open("红楼梦.txt", "r", encoding='gb18030').read()
words = jieba.lcut(txt)
jieba.add_word('宝二爷')
jieba.add_word('凤辣子')
jieba.add_word('凤哥儿')
jieba.add_word('凤丫头')
jieba.add_word('二太太')
jieba.add_word('林妹妹')
jieba.add_word('林姑娘')
jieba.add_word('琏二爷')
jieba.add_word('宝丫头')
jieba.add_word('宝姑娘')
jieba.add_word('宝姐姐')
jieba.add_word('平姐姐')
jieba.add_word('平姑娘')
jieba.add_word('薛夫人')
jieba.add_word('姨太太')
counts = {}
for word in words:
    if len(word) == 1:
        continue
    elif word == '宝玉' or word == '宝二爷':
        rword = '贾宝玉'
    elif word == '凤姐' or word == '凤姐儿' or word == '凤丫头' or word == '凤哥儿' or word == '凤辣子':
        rword = '王熙凤'
    elif word == '老太太' or word == '老祖宗':
        rword = '贾母'
    elif word == '宝钗' or word == '宝姐姐' or word == '宝姑娘' or word == '宝丫头':
        rword = '薛宝钗'
    elif word == '黛玉' or word == '林妹妹' or word == '林姑娘':
        rword = '林黛玉'
    elif word == '二太太':
        rword = '王夫人'
    elif word == '琏二爷':
        rword = '贾琏'
    elif word == '平姐姐' or word == '平姑娘':
        rword = '平儿'
    elif word == '薛夫人' or word == '姨太太':
        rword = '薛姨妈'
    else:
        rword = word
    counts[rword] = counts.get(rword, 0) + 1
for word in excludes:
    del (counts[word])
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
print(len(items))
for i in range(100):
    word, count = items[i]
    print("{0:<10}{1:>5}".format(word, count))
